# Disappointed Expectations: Downward Mobility and Electoral Change
# Thomas Kurer and Briitta van Staalduinen
# American Political Science Review

# Supplementary Analyses
# Using SOEP v35 2018 wave (main analysis) and SOEPlong (descriptives)
# Preparation

dev.off()
cat("\014")  
# globals
options(scipen=999)

# packages

if (!require("pacman")) install.packages("pacman") 

pacman::p_load(magrittr, tidyverse, broom, hrbrthemes, plm, estimatr, sandwich, lmtest, AER, lfe, huxtable, margins, readstata13, texreg, reshape2, readxl, xtablem, interplot)

library(xtable)
library(coefplot)
library(randomForest)
library(interactions)
library(glmnet) # lasso and ridge regression for prediction
library(LARF)   # contains function for generating polynomials and interactions
library(hdm)    # lasso-based double selection for causal inference
library(caret)  # contains command for confusion matrix
library(randomForest) # random forests
library(rpart)        # decision trees
library(rpart.plot)   # library for plotting decision trees
library(grf)
library(SuperLearner)
library(truncnorm)

# ggplot theme
theme_set(theme_bw())

# define paths

datpath <- "...insert path..."
outpath <- "...insert path..."

# set seed
RNGkind(sample.kind = "Rounding")
# ensures consistent seeds across R versions
set.seed(1115)

# LOAD  ----

soep <- read.csv(paste0(datpath, "data_main.csv"))
soeplong <- read.csv(paste0(datpath, "data_appendix_long.csv"))

# Empirical Distribution and Heat Map

heat <- soep %>% 
  dplyr::select(isei88_full, isei_pred) %>% 
  dplyr::filter(!is.na(isei88_full) & !is.na(isei_pred) ) %>% 
  mutate(isei10 = ntile(isei88_full, 10), isei_pred10 = ntile(isei_pred, 10))
heat$combo <- paste(heat$isei10, heat$isei_pred10, sep="-")

heat %<>% mutate(one=1, total=sum(one)) %>% group_by(combo) %>% mutate(freq=sum(one)) %>% dplyr::filter(row_number(isei88_full) == 1) %>% ungroup() %>% mutate(relfreq=ntile(freq, 100))

ggplot(data = heat, mapping = aes(x = isei_pred10, y = isei10, fill = relfreq)) +
  geom_tile() +
  scale_fill_gradient2(low = 'blue', mid = 'white', high = 'red') + 
  scale_x_continuous(name="Intergenerational Reference Point (Father)", breaks=seq(1,10, 1)) +
  scale_y_continuous(name="Realized Status (Child)", breaks=seq(1,10, 1)) +
  theme(legend.position="none", text = element_text(size=18))
ggsave(paste0(outpath, "FigureA5a_distribution_heat.pdf"), height=6, width=6)


ggplot(soep, aes(x=isei_pred2100)) + 
  geom_jitter(aes(y=isei100), shape=16, alpha=0.1) +
  geom_smooth(aes(y=isei100), method='lm', color="red", linetype="solid", size=0.8) +
  scale_x_continuous(name="Intergenerational Reference Point (Father)", breaks=seq(0, 100, 10)) +
  scale_y_continuous(name="Realized Status (Child)", limits = c(0, 100), breaks=seq(0, 100, 10)) +
  geom_abline(intercept = 0, linetype="dotted") + theme(text = element_text(size=18))
ggsave(paste0(outpath, "FigureA5b_distribution_empirical.pdf"), height=6, width=6)

# Descriptives over Time ----

# longitudinal ----

# sample of prime age respondents
traintest_long <- soeplong %>% dplyr::select(pid, syear, age, ybirth, isei88_full, german, mback, childloc, sameloc, contains("1989"), contains("loc_"), starts_with("bula_youth"), grades, starts_with("f")) %>% 
  dplyr::select(-fybirth) %>% 
  dplyr::filter(age>44 & age<56)


traintest_long$nrmiss <- rowSums(is.na(traintest_long))

# if varying nr of missing values over years, keep year with minimal nr of missings by pid
traintest_long %<>% group_by(pid) %>% filter(nrmiss==min(nrmiss))

# randomly draw one observation by pid  
traintest_long %<>% group_by(pid) %>% sample_n(1) %>%
  dplyr::select(-fgerman, -nrmiss, -sameloc, -childloc) %>% # drop unused variables
  filter(!is.na(age)) %>% # drop observations with only household grid answered
  ungroup()

colSums(is.na(traintest_long))

# rf3: include reduced set of potential predictors (traintest2) except cohort/year/age specific vars

traintest3 <- traintest_long %>% dplyr::select(-pid, -region1989, -fisei88, -abroad1989, -loc_cntryside, -sameloc_again, -bula_youth, -age, -grades, -starts_with("bula_y"))
traintest3 <- na.omit(traintest3)
traintest3 <- data.frame(traintest3)

traintest3 <- traintest3 %>% dplyr::select(syear, isei88_full, ybirth, female, german, mback, east1989, west1989, bornafter1989, loc_largecity, loc_medcity, loc_smallcity, sameloc_still, sameloc_no, fisei88_full, fedu) 


sample3 <- sample.int(n = nrow(traintest3), size = floor(.25*nrow(traintest3)), replace = FALSE)
train3 <- traintest3[sample3, ]
test3  <- traintest3[-sample3, ]

xtrain3=data.frame(train3[,-c(1:2)])
xtest3=data.frame(test3[,-c(1:2)])
ytrain3=train3[,2]
ytest3=test3[,2]

covar3=data.frame(test3[,c(1, 3, 4)])

rf3_500_3=randomForest(ytrain3~., data=xtrain3, mtry=3, ntree=500) 
  
rf3_500_3

# specification table
overtime_spec <- importance(rf3_500_3)
print(xtable(overtime_spec, type="latex"), file=paste0(outpath, "Table_FigureA8_Spec.tex"))

  
varImpPlot(rf3_500_3, type=2)
yhattest_rf3_500_3=predict(rf3_500_3, newdata=test3)
mse_rf3_500_3 <- mean((ytest3-yhattest_rf3_500_3)^2)

isei_pred_long_rf  <- yhattest_rf3_500_3

desc <- cbind(isei_pred_long_rf, ytest3, covar3)
desc$isd <- desc$isei_pred_long_rf - desc$ytest3

overtime <- desc %>% group_by(syear, female) %>% summarise(meaniseipred=mean(isei_pred_long_rf)) 

overtime %>% ggplot(aes(x=syear, y=meaniseipred, group=factor(female), color=factor(female))) + geom_line(alpha=.4) + geom_smooth(method="loess") +
  ylab("Mean Predicted Occupational Status") +
  scale_x_continuous("Survey Year", breaks=seq(1980, 2015, 5)) +
  scale_color_discrete(name = "Gender", labels = c("male", "female")) +
  theme_bw() +
  theme(text = element_text(size=20))
ggsave(paste0(outpath, "FigureA8_isd_overtime_bygender.pdf"), height=6, width=9)



# Standardized Coefficients ----

covars2std <- paste(c("scale(isd2)", "female", "scale(age)", "mback", "scale(edu)", "factor(alf)", "factor(sameloc)", "scale(log(incomem))", "factor(bula)", "factor(region1989)"),
                collapse="+")

t2_v2_afd_std <- lm(paste("vafd*100~", covars2std, sep=""), data=soep)

coefplot::coefplot(t2_v2_afd_std,
                   coefficients=c("scale(isd2)", "scale(age)", "scale(edu)", "scale(log(incomem))"),
                   newNames=c("scale(isd2)"="Status Discordance", "female" = "Gender: Female", "scale(age)" = "Age", "scale(edu)"="Education (numeric)", "scale(log(incomem))"= "Income (log)"),
                   color=c("black"), sort="magnitude",
                   title="",
                   xlab="Standardized Coefficient: Vote for AfD", ylab="") +
  geom_vline(xintercept=t2_v2_afd_std$coefficients[2], color="red", linetype="dotted") +
  geom_vline(xintercept=(t2_v2_afd_std$coefficients[2])*(-1), color="red", linetype="dotted") +
  theme_bw() +
  theme(text = element_text(size=20))

ggsave(paste0(outpath, "FigureA9_reg_v2_vafd_std.pdf"), height=6, width=8)



# Robustness ----

# Reduced Model

covars2 <- paste(c("isd", "female", "age", "mback", "factor(edu)", "factor(alf)", "factor(sameloc)", "log(incomem)", "factor(bula)", "factor(region1989)"),
                collapse="+")

# political alienation

t1_v1_vanti <- lm(paste("vanti*100~", covars2, sep=""), data=soep)
t1_v1_novote <- lm(paste("novote*100~", covars2, sep=""), data=soep)
t1_v1_noid <- lm(paste("noid*100~", covars2, sep=""), data=soep)
t1_v1_vmain1 <- lm(paste("vmainstream1*100~", covars2, sep=""), data=soep)
t1_v1_vmain2 <- lm(paste("vmainstream2*100~", covars2, sep=""), data=soep)

screenreg(list(t1_v1_novote, t1_v1_noid, t1_v1_vanti, t1_v1_vmain1, t1_v1_vmain2), digits=3, custom.model.names=c("Abstain", "No Party ID", "Vote Radical", "Vote Mainstream1", "Vote Mainstream2"))

texreg(list(t1_v1_novote, t1_v1_noid, t1_v1_vanti, t1_v1_vmain2), digits=3, 
       label="tab:reg_v1_alienation", caption="Intergenerational Status Discordance and Political Alienation", caption.above=TRUE,
       custom.model.names=c("Abstain", "No Party ID", "Vote Radical", "Vote Mainstream"), 
       custom.coef.map = list('isd' = 'Status Discordance',
                              'female' = 'Female (1=yes)',
                              'age'='Age',
                              'mback' = 'Migration Background (1=yes)',
                              'factor(edu)1' = 'Educ: Elementary',
                              'factor(edu)2' = 'Educ: Lower Second.',
                              'factor(edu)3' = 'Educ: Secondary',
                              'factor(edu)4' = 'Educ: University Prep.',
                              'factor(edu)5' = 'Educ: Tertiary I',
                              'factor(edu)6' = 'Educ: Tertiary II',
                              'factor(alf)1' = 'Emp. Status: not in labor force',
                              'factor(alf)2' = 'Emp. Status: other',
                              'log(incomem)' = 'Income (log)',
                              'factor(sameloc)2' = 'Location: Returned',
                              'factor(sameloc)3' = 'Location: Moved Away',
                              'factor(region1989)2' = 'In 1989: West',
                              'factor(region1989)3' = 'In 1989: Abroad',
                              'factor(region1989)4' = 'In 1989: Born later',
                              '(Intercept)' = 'Intercept'),
       custom.note=paste("%stars. All models include regional (Bundesland) fixed effects."),
       file=paste0(outpath, "TableA6_reg_v1_alienation.tex"))


# party vote choice

t2_v1_rr <- lm(paste("vrr*100~", covars2, sep=""), data=soep)
t2_v1_afd <- lm(paste("vafd*100~", covars2, sep=""), data=soep)
t2_v1_cducsu <- lm(paste("vcducsu*100~", covars2, sep=""), data=soep)
t2_v1_msr <- lm(paste("vmsr*100~", covars2, sep=""), data=soep)
t2_v1_msl <- lm(paste("vmsl*100~", covars2, sep=""), data=soep)
t2_v1_spd <- lm(paste("vspd*100~", covars2, sep=""), data=soep)
t2_v1_gruen <- lm(paste("vgruen*100~", covars2, sep=""), data=soep)
t2_v1_linke <- lm(paste("vlinke*100~", covars2, sep=""), data=soep)

screenreg(list(t2_v1_rr, t2_v1_afd, t2_v1_cducsu, t2_v1_msr, t2_v1_msl, t2_v1_gruen, t2_v1_spd, t2_v1_linke), digits=3, custom.model.names=c("rr", "afd", "cducsu", "msr", "msl", "green", "spd", "linke"))

texreg(list(t2_v1_rr, t2_v1_afd, t2_v1_cducsu, t2_v1_msr, t2_v1_msl, t2_v1_gruen, t2_v1_spd, t2_v1_linke), digits=3, 
       label="tab:reg_v1_partyvote", caption="Intergenerational Status Discordance and Party Choice", caption.above=TRUE,
       custom.model.names=c("RadRight", "AFD", "CDU/CSU", "MS Right", "MS Left", "Green", "SPD", "Linke"), 
       custom.coef.map = list('isd' = 'Status Discordance',
                              'female' = 'Female (1=yes)',
                              'age'='Age',
                              'mback' = 'Migration Background (1=yes)',
                              'factor(edu)1' = 'Educ: Elementary',
                              'factor(edu)2' = 'Educ: Lower Second.',
                              'factor(edu)3' = 'Educ: Secondary',
                              'factor(edu)4' = 'Educ: University Prep.',
                              'factor(edu)5' = 'Educ: Tertiary I',
                              'factor(edu)6' = 'Educ: Tertiary II',
                              'factor(alf)1' = 'Emp. Status: not in labor force',
                              'factor(alf)2' = 'Emp. Status: other',
                              'log(incomem)' = 'Income (log)',
                              'factor(sameloc)2' = 'Location: Returned',
                              'factor(sameloc)3' = 'Location: Moved Away',
                              'factor(region1989)2' = 'In 1989: West',
                              'factor(region1989)3' = 'In 1989: Abroad',
                              'factor(region1989)4' = 'In 1989: Born later',
                              '(Intercept)' = 'Intercept'),
       custom.note=paste("%stars. All models include regional (Bundesland) fixed effects."),
       file=paste0(outpath, "TableA7_reg_v1_partyvote.tex"))

plotreg(list(t2_v1_linke, t2_v1_spd, t2_v1_gruen, t2_v1_msl, t2_v1_msr, t2_v1_cducsu, t2_v1_afd, t2_v1_rr), 
        custom.model.names=c("Linke", "SPD", "Green", "Mainstream Left", "Mainstream Right", "CDU/CSU", "AfD", "Radical Right"), 
        custom.coef.map=list('isd'='Marginal Effect of Status Discordance'), 
        custom.note ="Outer bars are 0.95 CIs",
        signif.outer = TRUE,
        type = "forest")  + theme(text = element_text(size=16))
ggsave(paste0(outpath, "FigureA10_reg_v1_partyvote_coefs.pdf"), height=6, width=8)

# Percentile/Rank Version

# political alienation 


covarsrank <- paste(c("isd2100", "female", "age", "mback", "factor(edu)", "factor(alf)", "factor(sameloc)", "log(incomem)", "factor(bula)", "factor(region1989)"),
                collapse="+")



t1_rank_vanti <- lm(paste("vanti*100~", covarsrank, sep=""), data=soep)
t1_rank_novote <- lm(paste("novote*100~", covarsrank, sep=""), data=soep)
t1_rank_noid <- lm(paste("noid*100~", covarsrank, sep=""), data=soep)
t1_rank_vmain1 <- lm(paste("vmainstream1*100~", covarsrank, sep=""), data=soep)
t1_rank_vmain2 <- lm(paste("vmainstream2*100~", covarsrank, sep=""), data=soep)

screenreg(list(t1_rank_novote, t1_rank_noid, t1_rank_vanti, t1_rank_vmain1, t1_rank_vmain2), digits=3, custom.model.names=c("Abstain", "No Party ID", "Vote Radical", "Vote Mainstream1", "Vote Mainstream2"))

texreg(list(t1_rank_novote, t1_rank_noid, t1_rank_vanti, t1_rank_vmain2), digits=3, 
       label="tab:reg_rank_alienation", caption="Intergenerational Status Discordance and Political Alienation", caption.above=TRUE,
       custom.model.names=c("Abstain", "No Party ID", "Vote Radical", "Vote Mainstream"), 
       custom.coef.map = list('isd2100' = 'Status Discordance',
                              'female' = 'Female (1=yes)',
                              'age'='Age',
                              'mback' = 'Migration Background (1=yes)',
                              'factor(edu)1' = 'Educ: Elementary',
                              'factor(edu)2' = 'Educ: Lower Second.',
                              'factor(edu)3' = 'Educ: Secondary',
                              'factor(edu)4' = 'Educ: University Prep.',
                              'factor(edu)5' = 'Educ: Tertiary I',
                              'factor(edu)6' = 'Educ: Tertiary II',
                              'factor(alf)1' = 'Emp. Status: not in labor force',
                              'factor(alf)2' = 'Emp. Status: other',
                              'log(incomem)' = 'Income (log)',
                              'factor(sameloc)2' = 'Location: Returned',
                              'factor(sameloc)3' = 'Location: Moved Away',
                              'factor(region1989)2' = 'In 1989: West',
                              'factor(region1989)3' = 'In 1989: Abroad',
                              'factor(region1989)4' = 'In 1989: Born later',
                              '(Intercept)' = 'Intercept'),
       custom.note=paste("%stars. All models include regional (Bundesland) fixed effects."),
       file=paste0(outpath, "TableA8_reg_rank_alienation.tex"))


# party vote choice

t2_rank_rr <- lm(paste("vrr*100~", covarsrank, sep=""), data=soep)
t2_rank_afd <- lm(paste("vafd*100~", covarsrank, sep=""), data=soep)
t2_rank_cducsu <- lm(paste("vcducsu*100~", covarsrank, sep=""), data=soep)
t2_rank_msr <- lm(paste("vmsr*100~", covarsrank, sep=""), data=soep)
t2_rank_msl <- lm(paste("vmsl*100~", covarsrank, sep=""), data=soep)
t2_rank_spd <- lm(paste("vspd*100~", covarsrank, sep=""), data=soep)
t2_rank_gruen <- lm(paste("vgruen*100~", covarsrank, sep=""), data=soep)
t2_rank_linke <- lm(paste("vlinke*100~", covarsrank, sep=""), data=soep)

screenreg(list(t2_rank_rr, t2_rank_afd, t2_rank_cducsu, t2_rank_msr, t2_rank_msl, t2_rank_gruen, t2_rank_spd, t2_rank_linke), digits=3, custom.model.names=c("rr", "afd", "cducsu", "msr", "msl", "green", "spd", "linke"))

texreg(list(t2_rank_rr, t2_rank_afd, t2_rank_cducsu, t2_rank_msr, t2_rank_msl, t2_rank_gruen, t2_rank_spd, t2_rank_linke), digits=3, 
       label="tab:reg_rank_partyvote", caption="Intergenerational Status Discordance and Party Choice", caption.above=TRUE,
       custom.model.names=c("RadRight", "AFD", "CDU/CSU", "MS Right", "MS Left", "Green", "SPD", "Linke"), 
       custom.coef.map = list('isd2100' = 'Status Discordance',
                              'female' = 'Female (1=yes)',
                              'age'='Age',
                              'mback' = 'Migration Background (1=yes)',
                              'factor(edu)1' = 'Educ: Elementary',
                              'factor(edu)2' = 'Educ: Lower Second.',
                              'factor(edu)3' = 'Educ: Secondary',
                              'factor(edu)4' = 'Educ: University Prep.',
                              'factor(edu)5' = 'Educ: Tertiary I',
                              'factor(edu)6' = 'Educ: Tertiary II',
                              'factor(alf)1' = 'Emp. Status: not in labor force',
                              'factor(alf)2' = 'Emp. Status: other',
                              'log(incomem)' = 'Income (log)',
                              'factor(sameloc)2' = 'Location: Returned',
                              'factor(sameloc)3' = 'Location: Moved Away',
                              'factor(region1989)2' = 'In 1989: West',
                              'factor(region1989)3' = 'In 1989: Abroad',
                              'factor(region1989)4' = 'In 1989: Born later',
                              '(Intercept)' = 'Intercept'),
       custom.note=paste("%stars. All models include regional (Bundesland) fixed effects."),
       file=paste0(outpath, "TableA9_reg_rank_partyvote.tex"))

plotreg(list(t2_rank_linke, t2_rank_spd, t2_rank_gruen, t2_rank_msl, t2_rank_msr, t2_rank_cducsu, t2_rank_afd, t2_rank_rr), 
        custom.model.names=c("Linke", "SPD", "Green", "Mainstream Left", "Mainstream Right", "CDU/CSU", "AfD", "Radical Right"), 
        custom.coef.map=list('isd2100'='Marginal Effect of Status Discordance'), 
        custom.note ="Outer bars are 0.95 CIs",
        signif.outer = TRUE,
        type = "forest")  + theme(text = element_text(size=16))
ggsave(paste0(outpath, "FigureA11_reg_rank_partyvote_coefs.pdf"), height=6, width=8)

# East vs. West

east_now <- soep %>% filter(east==1)
west_now <- soep %>% filter(east==0)

east_1989 <- soep %>% filter(east1989==1)
west_1989 <- soep %>% filter(east1989==0)

east_birth <- soep %>% filter(birtheast==1)
west_birth <- soep %>% filter(birtheast==0)

covarsew <- paste(c("isd2", "female", "age", "mback", "factor(edu)", "factor(alf)", "factor(sameloc)", "log(incomem)"),
                collapse="+")

t2_v2_rr_east_now <- lm(paste("vrr*100~", covarsew, sep=""), data=east_now)
t2_v2_afd_east_now <- lm(paste("vafd*100~", covarsew, sep=""), data=east_now)
t2_v2_cducsu_east_now <- lm(paste("vcducsu*100~", covarsew, sep=""), data=east_now)
t2_v2_msr_east_now <- lm(paste("vmsr*100~", covarsew, sep=""), data=east_now)
t2_v2_msl_east_now <- lm(paste("vmsl*100~", covarsew, sep=""), data=east_now)
t2_v2_spd_east_now <- lm(paste("vspd*100~", covarsew, sep=""), data=east_now)
t2_v2_gruen_east_now <- lm(paste("vgruen*100~", covarsew, sep=""), data=east_now)
t2_v2_linke_east_now <- lm(paste("vlinke*100~", covarsew, sep=""), data=east_now)

plotreg(list(t2_v2_linke_east_now, t2_v2_spd_east_now, t2_v2_gruen_east_now, t2_v2_msl_east_now, t2_v2_msr_east_now, t2_v2_cducsu_east_now, t2_v2_afd_east_now, t2_v2_rr_east_now), 
        custom.model.names=c("Linke", "SPD", "Green", "Mainstream Left", "Mainstream Right", "CDU/CSU", "AfD", "Radical Right"), 
        custom.coef.map=list('isd2'='Marginal Effect of Status Discordance'), 
        custom.note ="Outer bars are 0.95 CIs",
        signif.outer = TRUE,
        type = "forest")  + theme(text = element_text(size=16))
ggsave(paste0(outpath, "FigureA12_reg_v2_partyvote_coefs_east_now.pdf"), height=6, width=8)

t2_v2_rr_west_now <- lm(paste("vrr*100~", covarsew, sep=""), data=west_now)
t2_v2_afd_west_now <- lm(paste("vafd*100~", covarsew, sep=""), data=west_now)
t2_v2_cducsu_west_now <- lm(paste("vcducsu*100~", covarsew, sep=""), data=west_now)
t2_v2_msr_west_now <- lm(paste("vmsr*100~", covarsew, sep=""), data=west_now)
t2_v2_msl_west_now <- lm(paste("vmsl*100~", covarsew, sep=""), data=west_now)
t2_v2_spd_west_now <- lm(paste("vspd*100~", covarsew, sep=""), data=west_now)
t2_v2_gruen_west_now <- lm(paste("vgruen*100~", covarsew, sep=""), data=west_now)
t2_v2_linke_west_now <- lm(paste("vlinke*100~", covarsew, sep=""), data=west_now)

plotreg(list(t2_v2_linke_west_now, t2_v2_spd_west_now, t2_v2_gruen_west_now, t2_v2_msl_west_now, t2_v2_msr_west_now, t2_v2_cducsu_west_now, t2_v2_afd_west_now, t2_v2_rr_west_now), 
        custom.model.names=c("Linke", "SPD", "Green", "Mainstream Left", "Mainstream Right", "CDU/CSU", "AfD", "Radical Right"), 
        custom.coef.map=list('isd2'='Marginal Effect of Status Discordance'), 
        custom.note ="Outer bars are 0.95 CIs",
        signif.outer = TRUE,
        type = "forest")  + theme(text = element_text(size=16))
ggsave(paste0(outpath, "FigureA13_reg_v2_partyvote_coefs_west_now.pdf"), height=6, width=8)

# Radical Left in the (former) East

t2_v2_linke_east_1989 <- lm(paste("vlinke*100~", covarsew, sep=""), data=east_1989)
t2_v2_linke_east_birth <- lm(paste("vlinke*100~", covarsew, sep=""), data=east_birth)

t2_v2_linke_west_1989 <- lm(paste("vlinke*100~", covarsew, sep=""), data=west_1989)
t2_v2_linke_west_birth <- lm(paste("vlinke*100~", covarsew, sep=""), data=west_birth)

covarshs <- paste(c("isd2*hs", "female", "age", "mback", "factor(alf)", "factor(sameloc)", "log(incomem)"),
                collapse="+")

t2_v2_linke_east_now_hs <- lm(paste("vlinke*100~", covarshs, sep=""), data=east_now)
t2_v2_linke_east_1989_hs <- lm(paste("vlinke*100~", covarshs, sep=""), data=east_1989)
t2_v2_linke_east_birth_hs <- lm(paste("vlinke*100~", covarshs, sep=""), data=east_birth)
t2_v2_linke_west_now_hs <- lm(paste("vlinke*100~", covarshs, sep=""), data=west_now)
t2_v2_linke_west_1989_hs <- lm(paste("vlinke*100~", covarshs, sep=""), data=west_1989)
t2_v2_linke_west_birth_hs <- lm(paste("vlinke*100~", covarshs, sep=""), data=west_birth)

texreg(list(t2_v2_linke_east_birth, t2_v2_linke_east_1989, t2_v2_linke_east_now, t2_v2_linke_west_birth, t2_v2_linke_west_1989, t2_v2_linke_west_now), digits=3, 
       label="tab:reg_linke_eastwest", caption="Status Discordance and Radical Left in East and West", caption.above=TRUE,
       custom.model.names=c("Birth East", "East 1989", "East Now", "Birth West", "West 1989", "West Now"), 
       custom.coef.map = list('isd2' = 'Status Discordance',
                              'female' = 'Female (1=yes)',
                              'age'='Age',
                              'mback' = 'Migration Background (1=yes)',
                              'factor(edu)1' = 'Educ: Elementary',
                              'factor(edu)2' = 'Educ: Lower Second.',
                              'factor(edu)3' = 'Educ: Secondary',
                              'factor(edu)4' = 'Educ: University Prep.',
                              'factor(edu)5' = 'Educ: Tertiary I',
                              'factor(edu)6' = 'Educ: Tertiary II',
                              'factor(alf)1' = 'Emp. Status: not in labor force',
                              'factor(alf)2' = 'Emp. Status: other',
                              'log(incomem)' = 'Income (log)',
                              'factor(sameloc)2' = 'Location: Returned',
                              'factor(sameloc)3' = 'Location: Moved Away',
                              '(Intercept)' = 'Intercept'),
       custom.note=paste("%stars."),
       file=paste0(outpath, "TableA10_reg_linke_eastwest.tex"))

texreg(list(t2_v2_linke_east_birth_hs, t2_v2_linke_east_1989_hs, t2_v2_linke_east_now_hs, t2_v2_linke_west_birth_hs, t2_v2_linke_west_1989_hs, t2_v2_linke_west_now_hs), digits=3, 
       label="tab:reg_linke_eastwest", caption="Status Discordance and Radical Left in East and West, by Education", caption.above=TRUE,
       custom.model.names=c("Birth East", "East 1989", "East Now", "Birth West", "West 1989", "West Now"), 
       custom.coef.map = list('isd2' = 'Status Discordance',
                              'hs' = 'College',
                              'isd2:hs' = 'SD X College',
                              'female' = 'Female (1=yes)',
                              'age'='Age',
                              'mback' = 'Migration Background (1=yes)',
                              'factor(alf)1' = 'Emp. Status: not in labor force',
                              'factor(alf)2' = 'Emp. Status: other',
                              'log(incomem)' = 'Income (log)',
                              'factor(sameloc)2' = 'Location: Returned',
                              'factor(sameloc)3' = 'Location: Moved Away',
                              'factor(region1989)2' = 'In 1989: West',
                              'factor(region1989)3' = 'In 1989: Abroad',
                              'factor(region1989)4' = 'In 1989: Born later',
                              '(Intercept)' = 'Intercept'),
       custom.note=paste("%stars."),
       file=paste0(outpath, "TableA11_reg_linke_eastwest_hs.tex"))

# Sensitivity ----

# model comparison across algorithm

vars <- c("rf1_1000_2", "rf1_1000_3", "rf1_1000_4", "rf1_1000_5", "rf1_1000_10", "rf1_500_2", "rf1_500_3", "rf1_500_4", "rf1_500_5", "rf1_500_10", "rf2_1000_2", "rf2_1000_3", "rf2_1000_4", "rf2_1000_5", "rf2_500_2", "rf2_500_3", "rf2_500_4", "rf2_500_5")

# harmonize var classes
soep$female <- as.factor(soep$female)

for (v in vars) {
  
soep[paste0("isei_pred_", v)] <- predict(eval(parse(text=v)), newdata=soep)
soep[paste0("isd_", v)] <- soep[paste0("isei_pred_", v)]-soep$isei88_full

}

isdvars <- c("isd_rf1_1000_2", "isd_rf1_1000_3", "isd_rf1_1000_4", "isd_rf1_1000_5", "isd_rf1_1000_10", "isd_rf1_500_2", "isd_rf1_500_3", "isd_rf1_500_4", "isd_rf1_500_5", "isd_rf1_500_10", "isd_rf2_1000_2", "isd_rf2_1000_3", "isd_rf2_1000_4", "isd_rf2_1000_5", "isd_rf2_500_2", "isd_rf2_500_3", "isd_rf2_500_4", "isd_rf2_500_5") #,"sl1", "sl2")

isd_corr <- soep %>% dplyr::select(all_of(isdvars))
cor(isd_corr, use="complete")


formula <- list(); model <- list(); coef <- list(); se <- list(); pval <- list()

for (i in 1:length(isdvars)) {
  formula[[i]] = paste0("vrr*100", " ~ ", paste(c(isdvars[i], "female", "age", "mback", "factor(edu)", "factor(alf)", "factor(sameloc)", "log(incomem)", "factor(bula)", "factor(region1989)"),
                collapse="+"))
  model[[i]] = summary(lm(formula[[i]], data=soep))
  coef[[i]]=model[[i]]$coefficient[2,1]
    se[[i]]=model[[i]]$coefficient[2,2]
  pval[[i]] = model[[i]]$coefficient[2,4]

}

robust_vrr <- data.frame(cbind(vars, round(unlist(coef), 3), round(unlist(se), 3), round(unlist(pval), 6)))
robust_vrr$pval <- ifelse(as.numeric(as.character(robust_vrr$V4))<0.001, "<.001", ">0.001")

robust_vrr$dat <- as.numeric(gsub("_", "", substr(robust_vrr$vars, 3,4)))
robust_vrr$npredictors <- ifelse(robust_vrr$dat==1, "extended", "reduced")
robust_vrr$spec <- substr(robust_vrr$vars, 5,nchar(as.character(robust_vrr$vars)))


robust_vrr_adj <- robust_vrr %>% dplyr::select(npredictors, spec, V2, V3, pval) %>% dplyr::rename(est=V2, se=V3) %>% arrange(est)

print(xtable(robust_vrr_adj, type = "latex"), file =paste0(outpath,"TableA12_sensitivity.tex"))

# Attitudes ----

covars_att <- paste(c("isd2", "factor(hs)", "female", "age", "mback", "factor(alf)", "factor(sameloc)", "log(incomem)", "factor(bula)", "factor(region1989)"),
                collapse="+")

covars_att_int <- paste(c("isd2*factor(hs)", "female", "age", "mback", "factor(alf)", "factor(sameloc)", "log(incomem)", "factor(bula)", "factor(region1989)"),
                collapse="+")

ta13_v2_1 <- lm(paste("polintr~", covars_att, sep=""), data=soep)
ta13_v2_2 <- lm(paste("worry_mig~", covars_att, sep=""), data=soep)
ta13_v2_3 <- lm(paste("worry_econsoc~", covars_att, sep=""), data=soep)
ta13_v2_4 <- lm(paste("worry_econego~", covars_att, sep=""), data=soep)


ta13_v2_5 <- lm(paste("polintr~", covars_att_int, sep=""), data=soep)
ta13_v2_6 <- lm(paste("worry_mig~", covars_att_int, sep=""), data=soep)
ta13_v2_7 <- lm(paste("worry_econsoc~", covars_att_int, sep=""), data=soep)
ta13_v2_8 <- lm(paste("worry_econego~", covars_att_int, sep=""), data=soep)

texreg(list(ta13_v2_1, ta13_v2_5, ta13_v2_2, ta13_v2_6, ta13_v2_3, ta13_v2_7, ta13_v2_4, ta13_v2_8), digits=3, 
        label="tab:reg_v2_attitudes_hs", caption="Status Discordance and Attitudes, Direct and by Education", caption.above=TRUE,
        custom.model.names=c("Pol Interest", "Pol Interest", "Worry: Mig", "Worry: Mig", "Worry: Econego", "Worry: Econego", "Worry: Econsoc", "Worry: Econsoc"),
       custom.coef.map = list('isd2' = 'Status Discordance',
                              'factor(hs)1' = 'College',
                              'isd2:factor(hs)1' = 'SD X College',
                              'female1' = 'Female (1=yes)',
                              'age'='Age',
                              'mback' = 'Migration Background (1=yes)',
                              'factor(alf)1' = 'Emp. Status: not in labor force',
                              'factor(alf)2' = 'Emp. Status: other',
                              'log(incomem)' = 'Income (log)',
                              'factor(sameloc)2' = 'Location: Returned',
                              'factor(sameloc)3' = 'Location: Moved Away',
                              'factor(region1989)2' = 'In 1989: West',
                              'factor(region1989)3' = 'In 1989: Abroad',
                              'factor(region1989)4' = 'In 1989: Born later',
                              '(Intercept)' = 'Intercept'),
       custom.note=paste("%stars. All models include regional (Bundesland) fixed effects."),
       file=paste0(outpath, "TableA13_reg_v2_attitudes_hs.tex"))



